##
## GENERAL INFORMATION ON HOUSEHOLDS AND THEIR LIVING STANDARDS ----------------
## 

rm(list=ls())

##
## LOAD PACKAGES ---------------------------------------------------------
## 

library(tidyverse)
library(countrycode)
library(sf)
library(rgdal)
library(RColorBrewer)
library(ggpubr)


##
## LOAD DATA -------------------------------------------------------------
##

load("Complete DLS data file_household level.RData")
load("Complete DLS data file_regional level.RData")

# How many households in the data
nrow(dls)
#4712696 households

# How many countries in the data 
 dls_region %>% 
  group_by(country_name, worldregion) %>% 
  count()

# How many regions in the data in the last wave
number_regions <- dls_region %>% 
  group_by(region_num_raw,country_name, worldregion) %>% 
  mutate(wave_max = max(wave)) %>% 
  ungroup() %>% 
  filter(wave == wave_max) %>% 
   count()
number_regions
#1103 subnational regions

# How many surveys in the data
number_surveys <- dls_region %>% 
  group_by(SurveyId) %>% 
  count()
number_surveys

# Distribution of years across waves
check <- dls %>% group_by(wave) %>% summarize(year_min=min(survey_year, na.rm = T), year_max=max(survey_year, na.rm = T))

# Shares of households without all living standards achieved in last DLS wave
aux1 <- dls %>% 
  group_by(region_num_raw, country_name) %>% 
  mutate(wave_max = max(wave)) %>% 
  ungroup() %>% 
  filter(wave == wave_max)

prop.table(table(aux1$dls_min10))
table(aux1$dls_min10)
prop.table(table(aux1$dls_min7))
table(aux1$dls_min7)
prop.table(table(aux1$dls_min5))
table(aux1$dls_min5)

# How many regions where less than 1% have achieved all DLS indicators?
aux2 <- dls_region %>% 
  group_by(region_num_raw,country_name) %>% 
  mutate(wave_max = max(wave, na.rm=T)) %>% 
  ungroup() %>% 
  filter(wave == wave_max)

regions_less1percent <- aux2 %>% filter(dls_min10_region < 0.01) %>% count()
regions_less1percent
regions_less1percent/number_regions

# How many regions where less than 5% have achieved all DLS indicators?
# How many regions where more than 50% of population have access to 2/3 DLS dimensions
# How many regions where more than 50% of population have access to 1/2 DLS dimensions 

aux2 <- aux2 %>% 
  mutate(dls_min10_region_binary_lessthan5percent = ifelse(dls_min10_region<0.05,1,0),
         dls_min10_region_binary_morethan25percent = ifelse(dls_min10_region>0.25,1,0),
         dls_min7_region_binary_morethan50percent = ifelse(dls_min7_region>0.5,1,0),
         dls_min5_region_binary_morethan50percent = ifelse(dls_min5_region>0.5,1,0))

table(aux2$dls_min10_region_binary_lessthan5percent)
table(aux2$dls_min10_region_binary_morethan25percent)
table(aux2$dls_min7_region_binary_morethan50percent)
table(aux2$dls_min5_region_binary_morethan50percent)

# differences between dimensions
prop.table(table(aux1$dim1_housing))
prop.table(table(aux1$dim2_thermal))
prop.table(table(aux1$dim3_nutrition))
prop.table(table(aux1$dim4_foodprep))
prop.table(table(aux1$dim5_water))
prop.table(table(aux1$dim6_sanitation))
prop.table(table(aux1$dim7_health))
prop.table(table(aux1$dim8_education))
prop.table(table(aux1$dim9_socialconnect))
prop.table(table(aux1$dim10_physicalconnect))

# differences in access by world region
aux1$worldregion <- countrycode(aux1$country_name, origin="country.name", destination="region")
table(aux1$worldregion)

aux1 %>% group_by(worldregion) %>% summarise(dls_min10 = mean(dls_min10, na.rm=T))

aux1 %>% group_by(worldregion) %>% summarise(dls_min7 = mean(dls_min7, na.rm=T))


